# import the necessary functions
from trafilatura import fetch_url, extract

# grab a HTML file to extract data from
downloaded = fetch_url('https://github.blog/2019-03-29-leader-spotlight-erin-spiceland/')

# output main content and comments as plain text
result = extract(downloaded)

print(result)

# change the output format to XML (allowing for preservation of document structure)
# result = extract(downloaded, output_format="xml")

# discard potential comment and change the output to JSON
# extract(downloaded, output_format="json", include_comments=False)

# from trafilatura import html2txt
# html2txt(downloaded)